From cf2e70369531203194b9739fa7282018f1579c2d Mon Sep 17 00:00:00 2001
From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= <pippin@gimp.org>
Date: Thu, 31 Aug 2017 05:10:24 +0200
Subject: [PATCH] babl: optimize powf and its use in u8tou8 internal converter

---
 babl/babl-fish-path.c | 137 +++++++++++++++++++++++++-----------------
 babl/babl-internal.h  |   2 +-
 2 files changed, 82 insertions(+), 57 deletions(-)

diff --git a/babl/babl-fish-path.c b/babl/babl-fish-path.c
index dd7c018..4fe5ded 100644
--- a/babl/babl-fish-path.c
+++ b/babl/babl-fish-path.c
@@ -183,16 +183,17 @@ get_conversion_path (PathContext *pc,
       double path_cost  = 0.0;
       double ref_cost   = 0.0;
       double path_error = 1.0;
+#if 0
       int    i;
-
       for (i = 0; i < babl_list_size (pc->current_path); i++)
         {
           path_error *= (1.0 + babl_conversion_error ((BablConversion *) pc->current_path->items[i]));
         }
 
-      //if (path_error - 1.0 <= _babl_legal_error ())
+      if (path_error - 1.0 <= _babl_legal_error ())
                 /* check this before the more accurate measurement of error -
                    to bail earlier */
+#endif
         {
           FishPathInstrumentation fpi;
           memset (&fpi, 0, sizeof (fpi));
@@ -201,6 +202,7 @@ get_conversion_path (PathContext *pc,
           fpi.destination = pc->to_format;
 
           get_path_instrumentation (&fpi, pc->current_path, &path_cost, &ref_cost, &path_error);
+          //path_cost += pc->current_path->count * 1000; // punish long chains
           if(debug_conversions && current_length == 1)
             fprintf (stderr, "%s  error:%f cost:%f  \n",
                  babl_get_name (pc->current_path->items[0]),
@@ -211,7 +213,8 @@ get_conversion_path (PathContext *pc,
 
           if ((path_cost < ref_cost) && /* do not use paths that took longer to compute than reference */
               (path_cost < pc->fish_path->fish_path.cost) &&
-              (path_error <= _babl_legal_error ()))
+              (path_error <= _babl_legal_error ())
+              )
             {
               /* We have found the best path so far,
                * let's copy it into our new fish */
@@ -453,11 +456,12 @@ universal_nonlinear_rgb_converter (const Babl *conversion,unsigned char *src_cha
   float (*from_linear_blue) (void *trc, float value);
 
   float * matrixf = conversion->conversion.data;
+  float  mat[9] = {matrixf[0], matrixf[1],matrixf[2],
+                   matrixf[3], matrixf[4],matrixf[5],
+                   matrixf[6], matrixf[7],matrixf[8]};
   int i;
   float *rgba_in = (void*)src_char;
   float *rgba_out = (void*)dst_char;
-  assert (source_space);
-  assert (destination_space);
 
   to_linear_red   = (void*)source_space->space.trc[0]->trc.fun_to_linear;
   to_trc_red      = (void*)source_space->space.trc[0];
@@ -476,13 +480,13 @@ universal_nonlinear_rgb_converter (const Babl *conversion,unsigned char *src_cha
 
   for (i = 0; i < samples; i++)
   {
-    float rgba_tmp[4];
+    float rgb_tmp[3]={
+       to_linear_red(to_trc_red, rgba_in[0]),
+       to_linear_green(to_trc_green, rgba_in[1]),
+       to_linear_blue(to_trc_blue, rgba_in[2])
+    };
 
-    rgba_tmp[0] = to_linear_red(to_trc_red, rgba_in[0]);
-    rgba_tmp[1] = to_linear_green(to_trc_green, rgba_in[1]);
-    rgba_tmp[2] = to_linear_blue(to_trc_blue, rgba_in[2]);
-
-    babl_matrix_mul_vectorff (matrixf, rgba_tmp, rgba_out);
+    babl_matrix_mul_vectorff (mat, rgb_tmp, rgba_out);
 
     rgba_out[0] = from_linear_red(from_trc_red, rgba_out[0]);
     rgba_out[1] = from_linear_green(from_trc_green, rgba_out[1]);
@@ -495,55 +499,82 @@ universal_nonlinear_rgb_converter (const Babl *conversion,unsigned char *src_cha
   return samples;
 }
 
-#if 1
-// does not seem to be valid
+#if 0
 static inline long
 universal_nonlinear_rgba_u8_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples)
 {
   const Babl *destination_space = conversion->conversion.destination->format.space;
 
   float * matrixf = conversion->conversion.data;
+  float  mat[9] = {matrixf[0], matrixf[1],matrixf[2],
+                   matrixf[3], matrixf[4],matrixf[5],
+                   matrixf[6], matrixf[7],matrixf[8]};
   float * in_trc_lut = matrixf + 9;
   int i;
   uint8_t *rgba_in_u8 = (void*)src_char;
   uint8_t *rgba_out_u8 = (void*)dst_char;
 
-  void   *from_trc_red;
-  void   *from_trc_green;
-  void   *from_trc_blue;
-  float (*from_linear_red) (void *trc, float value);
-  float (*from_linear_green) (void *trc, float value);
-  float (*from_linear_blue) (void *trc, float value);
+  const Babl *from_trc_red   = (void*)destination_space->space.trc[0];
+  const Babl *from_trc_green = (void*)destination_space->space.trc[1];
+  const Babl *from_trc_blue  = (void*)destination_space->space.trc[2];
+  float (*from_linear_red)   (const Babl *trc, float value) = from_trc_red->trc.fun_from_linear;
+  float (*from_linear_green) (const Babl *trc, float value) = from_trc_green->trc.fun_from_linear;
+  float (*from_linear_blue)  (const Babl *trc, float value) = from_trc_blue->trc.fun_from_linear;
 
-  from_linear_red = (void*)destination_space->space.trc[0]->trc.fun_from_linear;
-  from_trc_red    = (void*)destination_space->space.trc[0];
-  from_linear_green= (void*)destination_space->space.trc[1]->trc.fun_from_linear;
-  from_trc_green  = (void*)destination_space->space.trc[1];
-  from_linear_blue= (void*)destination_space->space.trc[2]->trc.fun_from_linear;
-  from_trc_blue   = (void*)destination_space->space.trc[2];
 
   for (i = 0; i < samples; i++)
   {
-    float rgb[3];
-    int c;
-    for (c = 0; c < 3; c ++)
-      rgb[c] = in_trc_lut[rgba_in_u8[c]]; 
+    float rgb[3]={in_trc_lut[rgba_in_u8[0]],
+                  in_trc_lut[rgba_in_u8[1]],
+                  in_trc_lut[rgba_in_u8[2]]};
 
-    babl_matrix_mul_vectorff (matrixf, rgb, rgb);
+    babl_matrix_mul_vectorff (mat, rgb, rgb);
 
-    {
-      int v = from_linear_red (from_trc_red, rgb[0]) * 255.5;
-      rgba_out_u8[0] = v; //v < 0 ?  0 : v > 255 ? 255 : v;
-    }
-    {
-      int v = from_linear_green (from_trc_green, rgb[1]) * 255.5;
-      rgba_out_u8[1] = v; //v < 0 ?  0 : v > 255 ? 255 : v;
-    }
-    {
-      int v = from_linear_blue (from_trc_blue , rgb[2]) * 255.5;
-      rgba_out_u8[2] = v; //v < 0 ?  0 : v > 255 ? 255 : v;
-    }
+    rgba_out_u8[0] = from_linear_red (from_trc_red, rgb[0]) * 255.5f;
+    rgba_out_u8[1] = from_linear_green (from_trc_green, rgb[1]) * 255.5f;
+    rgba_out_u8[2] = from_linear_blue (from_trc_blue , rgb[2]) * 255.5f;
+    rgba_out_u8[3] = rgba_in_u8[3];
+    rgba_in_u8  += 4;
+    rgba_out_u8 += 4;
+  }
 
+  return samples;
+}
+#else
+
+static inline long
+universal_nonlinear_rgba_u8_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples)
+{
+  const Babl *destination_space = conversion->conversion.destination->format.space;
+
+  float * matrixf = conversion->conversion.data;
+  float  mat[9] = {matrixf[0], matrixf[1],matrixf[2],
+                   matrixf[3], matrixf[4],matrixf[5],
+                   matrixf[6], matrixf[7],matrixf[8]};
+  float * in_trc_lut = matrixf + 9;
+  int i;
+  uint8_t *rgba_in_u8 = (void*)src_char;
+  uint8_t *rgba_out_u8 = (void*)dst_char;
+
+  const Babl *from_trc_red   = (void*)destination_space->space.trc[0];
+  const Babl *from_trc_green = (void*)destination_space->space.trc[1];
+  const Babl *from_trc_blue  = (void*)destination_space->space.trc[2];
+  float (*from_linear_red)   (const Babl *trc, float value) = from_trc_red->trc.fun_from_linear;
+  float (*from_linear_green) (const Babl *trc, float value) = from_trc_green->trc.fun_from_linear;
+  float (*from_linear_blue)  (const Babl *trc, float value) = from_trc_blue->trc.fun_from_linear;
+
+
+  for (i = 0; i < samples; i++)
+  {
+    float rgb[3]={in_trc_lut[rgba_in_u8[0]],
+                  in_trc_lut[rgba_in_u8[1]],
+                  in_trc_lut[rgba_in_u8[2]]};
+
+    babl_matrix_mul_vectorff (mat, rgb, rgb);
+
+    rgba_out_u8[0] = from_linear_red (from_trc_red, rgb[0]) * 255.5f;
+    rgba_out_u8[1] = from_linear_green (from_trc_green, rgb[1]) * 255.5f;
+    rgba_out_u8[2] = from_linear_blue (from_trc_blue , rgb[2]) * 255.5f;
     rgba_out_u8[3] = rgba_in_u8[3];
     rgba_in_u8  += 4;
     rgba_out_u8 += 4;
@@ -557,13 +588,16 @@ static inline long
 universal_rgb_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long samples)
 {
   float *matrixf = conversion->conversion.data;
+  float  mat[9] = {matrixf[0], matrixf[1],matrixf[2],
+                   matrixf[3], matrixf[4],matrixf[5],
+                   matrixf[6], matrixf[7],matrixf[8]};
   int i;
   float *rgba_in = (void*)src_char;
   float *rgba_out = (void*)dst_char;
 
   for (i = 0; i < samples; i++)
   {
-    babl_matrix_mul_vectorff (matrixf, rgba_in, rgba_out);
+    babl_matrix_mul_vectorff (mat, rgba_in, rgba_out);
     rgba_out[3] = rgba_in[3];
     rgba_in  += 4;
     rgba_out += 4;
@@ -578,6 +612,7 @@ add_rgb_adapter (Babl *babl,
 {
   if (babl != space)
   {
+#if 1
     prep_conversion(babl_conversion_new(babl_format_with_space("RGBA float", space),
                         babl_format_with_space("RGBA float", babl),
                         "linear", universal_rgb_converter,
@@ -586,7 +621,7 @@ add_rgb_adapter (Babl *babl,
                         babl_format_with_space("RGBA float", space),
                         "linear", universal_rgb_converter,
                         NULL));
-
+#endif
     prep_conversion(babl_conversion_new(babl_format_with_space("R'G'B'A float", space),
                         babl_format_with_space("R'G'B'A float", babl),
                         "linear", universal_nonlinear_rgb_converter,
@@ -652,9 +687,6 @@ babl_fish_path (const Babl *source,
       babl_conversion_class_for_each (alias_conversion, (void*)source->format.space);
 
       add_universal_rgb (source->format.space);
-
-
-
     }
     if ((done & 2) == 0 && (destination->format.space != source->format.space) && (destination->format.space != sRGB))
     {
@@ -969,15 +1001,8 @@ init_path_instrumentation (FishPathInstrumentation *fpi,
 
   if (!fpi->fmt_rgba_double)
     {
-      fpi->fmt_rgba_double = babl_format_new (
-        babl_model ("RGBA"),
-        babl_space ("sRGB"),
-        babl_type ("double"),
-        babl_component ("R"),
-        babl_component ("G"),
-        babl_component ("B"),
-        babl_component ("A"),
-        NULL);
+      fpi->fmt_rgba_double = babl_format_with_space ("RGBA double",
+                                                     fmt_destination->format.space);
     }
 
   fpi->num_test_pixels = babl_get_num_path_test_pixels ();
diff --git a/babl/babl-internal.h b/babl/babl-internal.h
index 59e484a..61d6e09 100644
--- a/babl/babl-internal.h
+++ b/babl/babl-internal.h
@@ -28,7 +28,7 @@
 #endif
 
 #define BABL_MAX_COMPONENTS       32
-#define BABL_CONVERSIONS           5
+#define BABL_CONVERSIONS          5
 
 #include <stdlib.h>
 #include <stdio.h>
-- 
2.30.2